library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cowplot)
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
library(here)
## here() starts at /Users/julianechave/lab/projects/enzyme_evo_str_analysis
# library(patchwork)
# library(ggpubr)
# library(ggdist)
# library(grid)
# library(gridExtra)

# library(stats) # For lm

theme_set(theme_minimal())
theme_set(theme_cowplot(font_size = 10))

Parameters

mcsa_id_case <- 2

Input

## Dataset
dataset <- read_csv(here("data", "final_dataset.csv"))  %>% 
  mutate(mcsa_id = as.character(mcsa_id))
dataset
## Profiles
profiles <- read_csv(here("data", "final_dataset_profiles.csv"))  %>% 
  mutate(mcsa_id = as.character(mcsa_id))
profiles
## Goodness of fit of scam models

data_gof <- read_csv(here("data", "final_dataset_gof.csv"))  %>% 
  mutate(mcsa_id = as.character(mcsa_id))
data_gof
## Join dataset and gof

dataset_and_gof <-  dataset %>% 
  inner_join(data_gof) 
dataset_and_gof
## Active-site residues (for fig_asite)

data_asite_residues <- read_csv(here("data", "final_dataset_residues.csv")) %>%
  transmute(mcsa_id = as.character(mcsa_id), 
            ref_pdb_chain_id = pdb_chain_id, 
            ref_pdb_site = pdb_site,
            uniprot_aa, function_location, function_type, 
            function_group, funct = `function`, 
            function_emo, main_annotation) %>%
  left_join(profiles) %>%
  filter(mcsa_id %in% profiles$mcsa_id)

Observed profiles and patterns

source(here("code", "plot_obs.R"))
plot <- plot_obs(profiles, data_gof, 2)
print(plot)

ggsave(here("figures", "fig_obs.pdf"), plot, width = 6.5, height = 4.1)

Models vs. observations

source(here("code", "plot_fits_vs_obs.R"))
plot <- plot_fits_vs_obs(profiles, data_gof, 2)
print(plot)

ggsave(here("figures", "fig_fit_vs_obs.pdf"), plot, width = 6.5, height = 6.8)

Active site divergence

source(here("code/plot_asite.R"))
plot <- plot_asite(profiles, data_gof, data_asite_residues)
print(plot)

ggsave(here("figures", "fig_asite.pdf"), plot, width = 6.5, height = 3.5)

Split s1 and s2 contributions

source(here("code", "plot_split.R"))
plot <- plot_split(profiles, data_gof, 2)
print(plot)

ggsave(here("figures", "fig_split.pdf"), plot, width = 6.5, height = 4.1)

shapley map and examples

# Source the plotting function
source(here("code", "plot_shapley_map.R"))

# Create the plot
plot <- plot_shapley_map(
    data_gof = data_gof,
    dataset = dataset,
    mcsa_id_examples = c(858, 15, 2, 252, 908),
    mcsa_id_outliers = c(749, 258)
)

# Display the plot
print(plot)

# Save the plot with specified dimensions
ggsave(
    here("figures", "fig_shapley_map.pdf"),
    plot,
    width = 5.5,
    height = 4.5 
)